-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[AMDGPU] Use 64-bit literals in codegen on gfx1250 #148727
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Use 64-bit literals in codegen on gfx1250 #148727
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) ChangesPatch is 45.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148727.diff 6 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 2540921b75e5d..9adf6f7cb1b8f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -447,6 +447,35 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
return;
}
+ bool IsGCN = CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
+ if (IsGCN && Subtarget->has64BitLiterals() && VT.getSizeInBits() == 64 &&
+ CurDAG->isConstantValueOfAnyType(SDValue(N, 0))) {
+ uint64_t C = 0;
+ bool AllConst = true;
+ unsigned EltSize = EltVT.getSizeInBits();
+ for (unsigned I = 0; I < NumVectorElts; ++I) {
+ SDValue Op = N->getOperand(I);
+ if (Op.isUndef()) {
+ AllConst = false;
+ break;
+ }
+ uint64_t Val;
+ if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Op)) {
+ Val = CF->getValueAPF().bitcastToAPInt().getZExtValue();
+ } else
+ Val = cast<ConstantSDNode>(Op)->getZExtValue();
+ C |= Val << (EltSize * I);
+ }
+ if (AllConst) {
+ SDValue CV = CurDAG->getTargetConstant(C, DL, MVT::i64);
+ MachineSDNode *Copy = CurDAG->getMachineNode(AMDGPU::S_MOV_B64_IMM_PSEUDO,
+ DL, VT, CV);
+ CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, VT, SDValue(Copy, 0),
+ RegClass);
+ return;
+ }
+ }
+
assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
"supported yet");
// 32 = Max Num Vector Elements
@@ -454,7 +483,6 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
// 1 = Vector Register Class
SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
- bool IsGCN = CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
bool IsRegSeq = true;
unsigned NOps = N->getNumOperands();
@@ -676,7 +704,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
case ISD::Constant:
case ISD::ConstantFP: {
- if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
+ if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N) ||
+ Subtarget->has64BitLiterals())
break;
uint64_t Imm;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e90316cee12fe..21bd017540b09 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -12155,6 +12155,11 @@ SDValue SITargetLowering::splitBinaryBitConstantOp(
if ((bitOpWithConstantIsReducible(Opc, ValLo) ||
bitOpWithConstantIsReducible(Opc, ValHi)) ||
(CRHS->hasOneUse() && !TII->isInlineConstant(CRHS->getAPIntValue()))) {
+ // We have 64-bit scalar and/or/xor, but do not have vector forms.
+ if (Subtarget->has64BitLiterals() && CRHS->hasOneUse() &&
+ !CRHS->user_begin()->isDivergent())
+ return SDValue();
+
// If we need to materialize a 64-bit immediate, it will be split up later
// anyway. Avoid creating the harder to understand 64-bit immediate
// materialization.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4c5f938831243..20a8da4a317db 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2273,6 +2273,12 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
const MachineOperand &SrcOp = MI.getOperand(1);
assert(!SrcOp.isFPImm());
+
+ if (ST.has64BitLiterals()) {
+ MI.setDesc(get(AMDGPU::S_MOV_B64));
+ break;
+ }
+
APInt Imm(64, SrcOp.getImm());
if (Imm.isIntN(32) || isInlineConstant(Imm)) {
MI.setDesc(get(AMDGPU::S_MOV_B64));
@@ -6099,14 +6105,18 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP32;
if (Is64BitOp &&
!AMDGPU::isInlinableLiteral64(Imm, ST.hasInv2PiInlineImm())) {
- if (!AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp))
+ if (!AMDGPU::isValid32BitLiteral(Imm, Is64BitFPOp) &&
+ (!ST.has64BitLiterals() || InstDesc.getSize() != 4))
return false;
// FIXME: We can use sign extended 64-bit literals, but only for signed
// operands. At the moment we do not know if an operand is signed.
// Such operand will be encoded as its low 32 bits and then either
// correctly sign extended or incorrectly zero extended by HW.
- if (!Is64BitFPOp && (int32_t)Imm < 0)
+ // If 64-bit literals are supported and the literal will be encoded
+ // as full 64 bit we still can use it.
+ if (!Is64BitFPOp && (int32_t)Imm < 0 &&
+ (!ST.has64BitLiterals() || AMDGPU::isValid32BitLiteral(Imm, false)))
return false;
}
}
@@ -9178,15 +9188,30 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
if (isDPP(MI))
return DescSize;
bool HasLiteral = false;
+ unsigned LiteralSize = 4;
for (int I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
const MachineOperand &Op = MI.getOperand(I);
const MCOperandInfo &OpInfo = Desc.operands()[I];
if (!Op.isReg() && !isInlineConstant(Op, OpInfo)) {
HasLiteral = true;
+ if (ST.has64BitLiterals()) {
+ switch(OpInfo.OperandType) {
+ default:
+ break;
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ if (!AMDGPU::isValid32BitLiteral(Op.getImm(), true))
+ LiteralSize = 8;
+ break;
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ if (!Op.isImm() || !AMDGPU::isValid32BitLiteral(Op.getImm(), false))
+ LiteralSize = 8;
+ break;
+ }
+ }
break;
}
}
- return HasLiteral ? DescSize + 4 : DescSize;
+ return HasLiteral ? DescSize + LiteralSize : DescSize;
}
// Check whether we have extra NSA words.
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index fd39b8a1350c6..4a4b865dc5d1d 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -1058,7 +1058,11 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
// fold an immediate into the shrunk instruction as a literal operand. In
// GFX10 VOP3 instructions can take a literal operand anyway, so there is
// no advantage to doing this.
- if (ST->hasVOP3Literal() && !IsPostRA)
+ // However, if 64-bit literals are allowed we still need to shrink it
+ // for such literal to be able to fold.
+ if (ST->hasVOP3Literal() &&
+ (!ST->has64BitLiterals() || AMDGPU::isTrue16Inst(MI.getOpcode())) &&
+ !IsPostRA)
continue;
if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
diff --git a/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll b/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll
index ac03d2dae8fa8..dea9142cf2bee 100644
--- a/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll
+++ b/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll
@@ -1,8 +1,10 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX9,NOT-GFX12 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10,NOT-GFX12 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX11,GFX1100,NOT-GFX12 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX11,GFX1150,NOT-GFX12 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX1200 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
declare float @llvm.fabs.f32(float)
declare float @llvm.fma.f32(float, float, float)
@@ -35,11 +37,19 @@ define float @v_mul_f32_vop2(float %x, float %y) {
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
; GFX1200-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10]
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+;
+; GFX1250-LABEL: v_mul_f32_vop2:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
+; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
+; GFX1250-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10]
+; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
%mul = fmul float %x, %y
ret float %mul
}
; NOT-GFX12: codeLenInByte = 12
; GFX1200: codeLenInByte = 28
+; GFX1250: codeLenInByte = 16
define float @v_mul_f32_vop2_inline_imm(float %x) {
; GFX9-LABEL: v_mul_f32_vop2_inline_imm:
@@ -69,11 +79,19 @@ define float @v_mul_f32_vop2_inline_imm(float %x) {
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
; GFX1200-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10]
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+;
+; GFX1250-LABEL: v_mul_f32_vop2_inline_imm:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
+; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
+; GFX1250-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10]
+; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
%mul = fmul float %x, 4.0
ret float %mul
}
; NOT-GFX12: codeLenInByte = 12
; GFX1200: codeLenInByte = 28
+; GFX1250: codeLenInByte = 16
define float @v_mul_f32_vop2_literal(float %x) {
; GFX9-LABEL: v_mul_f32_vop2_literal:
@@ -103,11 +121,19 @@ define float @v_mul_f32_vop2_literal(float %x) {
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
; GFX1200-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42]
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+;
+; GFX1250-LABEL: v_mul_f32_vop2_literal:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
+; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
+; GFX1250-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42]
+; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
%mul = fmul float %x, 123.0
ret float %mul
}
; NOT-GFX12: codeLenInByte = 16
; GFX1200: codeLenInByte = 32
+; GFX1250: codeLenInByte = 20
define float @v_mul_f32_vop3_src_mods(float %x, float %y) {
; GFX9-LABEL: v_mul_f32_vop3_src_mods:
@@ -137,12 +163,20 @@ define float @v_mul_f32_vop3_src_mods(float %x, float %y) {
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
; GFX1200-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00]
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+;
+; GFX1250-LABEL: v_mul_f32_vop3_src_mods:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
+; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
+; GFX1250-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00]
+; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
%fabs.x = call float @llvm.fabs.f32(float %x)
%mul = fmul float %fabs.x, %y
ret float %mul
}
; NOT-GFX12: codeLenInByte = 16
; GFX1200: codeLenInByte = 32
+; GFX1250: codeLenInByte = 20
define float @v_mul_f32_vop3_src_mods_inline_imm(float %x, float %y) {
; GFX9-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
@@ -172,6 +206,13 @@ define float @v_mul_f32_vop3_src_mods_inline_imm(float %x, float %y) {
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
; GFX1200-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00]
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+;
+; GFX1250-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
+; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
+; GFX1250-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00]
+; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
%fabs.x = call float @llvm.fabs.f32(float %x)
%mul = fmul float %fabs.x, 4.0
ret float %mul
@@ -179,6 +220,7 @@ define float @v_mul_f32_vop3_src_mods_inline_imm(float %x, float %y) {
; NOT-GFX12: codeLenInByte = 16
; GFX1200: codeLenInByte = 32
+; GFX1250: codeLenInByte = 20
define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) {
; GFX9-LABEL: v_mul_f32_vop3_src_mods_literal:
@@ -209,6 +251,13 @@ define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) {
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
; GFX1200-NEXT: v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42]
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+;
+; GFX1250-LABEL: v_mul_f32_vop3_src_mods_literal:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
+; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
+; GFX1250-NEXT: v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42]
+; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
%fabs.x = call float @llvm.fabs.f32(float %x)
%mul = fmul float %fabs.x, 123.0
ret float %mul
@@ -218,6 +267,7 @@ define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) {
; GFX10: codeLenInByte = 20
; GFX11: codeLenInByte = 20
; GFX1200: codeLenInByte = 36
+; GFX1250: codeLenInByte = 24
define float @v_mul_f32_vop2_frame_index(float %x) {
; GFX9-LABEL: v_mul_f32_vop2_frame_index:
@@ -249,6 +299,13 @@ define float @v_mul_f32_vop2_frame_index(float %x) {
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
; GFX1200-NEXT: v_mul_f32_e32 v0, s32, v0 ; encoding: [0x20,0x00,0x00,0x10]
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+;
+; GFX1250-LABEL: v_mul_f32_vop2_frame_index:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
+; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
+; GFX1250-NEXT: v_mul_f32_e32 v0, s32, v0 ; encoding: [0x20,0x00,0x00,0x10]
+; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
%alloca = alloca i32, addrspace(5)
%ptrtoint = ptrtoint ptr addrspace(5) %alloca to i32
%cast = bitcast i32 %ptrtoint to float
@@ -260,6 +317,7 @@ define float @v_mul_f32_vop2_frame_index(float %x) {
; GFX10: codeLenInByte = 20
; GFX11: codeLenInByte = 12
; GFX1200: codeLenInByte = 28
+; GFX1250: codeLenInByte = 16
define float @v_fma_f32(float %x, float %y, float %z) {
; GFX9-LABEL: v_fma_f32:
@@ -289,12 +347,20 @@ define float @v_fma_f32(float %x, float %y, float %z) {
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
; GFX1200-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x13,0xd6,0x00,0x03,0x0a,0x04]
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+;
+; GFX1250-LABEL: v_fma_f32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
+; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
+; GFX1250-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x13,0xd6,0x00,0x03,0x0a,0x04]
+; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
%fma = call float @llvm.fma.f32(float %x, float %y, float %z)
ret float %fma
}
; NOT-GFX12: codeLenInByte = 16
; GFX1200: codeLenInByte = 32
+; GFX1250: codeLenInByte = 20
define float @v_fma_f32_src_mods(float %x, float %y, float %z) {
; GFX9-LABEL: v_fma_f32_src_mods:
@@ -324,6 +390,13 @@ define float @v_fma_f32_src_mods(float %x, float %y, float %z) {
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
; GFX1200-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0x0a,0x04]
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+;
+; GFX1250-LABEL: v_fma_f32_src_mods:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
+; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
+; GFX1250-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0x0a,0x04]
+; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
%fabs.x = call float @llvm.fabs.f32(float %x)
%fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z)
ret float %fma
@@ -331,6 +404,7 @@ define float @v_fma_f32_src_mods(float %x, float %y, float %z) {
; NOT-GFX12: codeLenInByte = 16
; GFX1200: codeLenInByte = 32
+; GFX1250: codeLenInByte = 20
define float @v_fmac_f32(float %x, float %y) {
; GFX9-LABEL: v_fmac_f32:
@@ -360,6 +434,13 @@ define float @v_fmac_f32(float %x, float %y) {
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
; GFX1200-NEXT: v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56]
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+;
+; GFX1250-LABEL: v_fmac_f32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
+; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
+; GFX1250-NEXT: v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56]
+; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
%fma = call float @llvm.fma.f32(float %x, float %y, float %x)
ret float %fma
}
@@ -368,6 +449,7 @@ define float @v_fmac_f32(float %x, float %y) {
; GFX10: codeLenInByte = 12
; GFX11: codeLenInByte = 12
; GFX1200: codeLenInByte = 28
+; GFX1250: codeLenInByte = 16
define float @v_fmaak_f32(float %x, float %y) {
; GFX9-LABEL: v_fmaak_f32:
@@ -398,6 +480,13 @@ define float @v_fmaak_f32(float %x, float %y) {
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
; GFX1200-NEXT: v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43]
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+;
+; GFX1250-LABEL: v_fmaak_f32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
+; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
+; GFX1250-NEXT: v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43]
+; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
%fma = call float @llvm.fma.f32(float %x, float %y, float 256.0)
ret float %fma
}
@@ -406,6 +495,7 @@ define float @v_fmaak_f32(float %x, float %y) {
; GFX10: codeLenInByte = 16
; GFX11: codeLenInByte = 16
; GFX1200: codeLenInByte = 32
+; GFX1250: codeLenInByte = 20
define float @v_fma_k_f32_src_mods(float %x, float %y) {
; GFX9-LABEL: v_fma_k_f32_src_mods:
@@ -436,6 +526,13 @@ define float @v_fma_k_f32_src_mods(float %x, float %y) {
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
; GFX1200-NEXT: v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43]
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+;
+; GFX1250-LABEL: v_fma_k_f32_src_mods:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
+; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
+; GFX1250-NEXT: v_fma_f32 v0, |v0|, v1, 0x4380000...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
ee6b41f to
4fdfcf2
Compare
| if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Op)) { | ||
| Val = CF->getValueAPF().bitcastToAPInt().getZExtValue(); | ||
| } else | ||
| Val = cast<ConstantSDNode>(Op)->getZExtValue(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Op)) { | |
| Val = CF->getValueAPF().bitcastToAPInt().getZExtValue(); | |
| } else | |
| Val = cast<ConstantSDNode>(Op)->getZExtValue(); | |
| if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Op)) | |
| Val = CF->getValueAPF().bitcastToAPInt().getZExtValue(); | |
| else | |
| Val = cast<ConstantSDNode>(Op)->getZExtValue(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Matt was asking to use braces if we have defined a variable inside if.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, okay, then use {} for else. Use half of them is not a good idea.
I'm not sure if LLVM code standard says we need a {} for variable definition.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
And again, I've been asked multiple times not to use it on else in this situation ;)

No description provided.